In [ ]:
%matplotlib inline

import numpy as np
from numpy.fft import fft2, ifft2, fftshift, ifftshift

from scipy.stats import multivariate_normal

import matplotlib.pyplot as plt

import menpo.io as mio
from menpo.image import Image
from menpo.feature import hog, no_op
from menpo.shape import PointCloud
from menpo.visualize import visualize_images

from templatetracker.correlationfilter.kernelizedfilter import (
    gaussian_correlation, polynomial_correlation, 
    linear_correlation, learn_kcf)
from templatetracker.correlationfilter.utils import (
    build_grid, normalizenorm_vec, fast2dconv, crop)

In [ ]:
def greyscale(i):
    return i.as_greyscale('average')

def greyscale_hog(i):
    return hog(greyscale(i))

def combine(i):
    return Image(np.concatenate((i.pixels, greyscale(i).pixels, greyscale_hog(i).pixels)))

Kernelized Correlation Filters

Load and manipulate data

Load landmarked facial images.


In [ ]:
images = []
for i in mio.import_images('../../data/face_images/*', verbose=True, 
                           max_images=5):
    i.crop_to_landmarks_proportion_inplace(0.5)
    i = i.rescale_landmarks_to_diagonal_range(100)
    images.append(i)

In [ ]:
visualize_images(images)

Extract 31 x 31 patches around landmark number 45 (the corner of the left eye) from the previous images. Note that any other landmark could be chosen.


In [ ]:
patch_shape = np.asarray((101, 101))
lm_number = 45
features = no_op # no_op, gresycale, greyscale_hog

image_patches = []
pixel_patches = []
for i in images:
    image_patches.append(i.extract_patches_around_landmarks(patch_size=patch_shape)[lm_number])
    feature_patches = features(image_patches[-1])
    pixel_patches.append(feature_patches.pixels)

In [ ]:
visualize_images(image_patches)

Store patches as numpy array.


In [ ]:
X = np.asarray(pixel_patches)

These are the patches that we will used in order to define and test our Kernelized Correlation Filters.

Define the desired response for each patch. Note that, because all patches are centred about the same landmark they share the same desired response, i.e. a 2 dimensional Gaussian response centred at the middle of the patch.


In [ ]:
cov = 3

# define Gaussian response
mvn = multivariate_normal(mean=np.zeros(2), cov=cov)
grid = build_grid((31, 31))
y = mvn.pdf(grid)[None]

In [ ]:
plt.title('Desired response')
plt.imshow(y[0])

Learn Kernelized Correlation Filter (KCF)

At this point we will use the first image patch as the template from which to learn a KCF. Note that we could have chosen any other image patch.


In [ ]:
img_number = 0
x = X[img_number]

Apart from the kernel correlation specific parameters, we need to make some choices regarding the overall learning procedure.


In [ ]:
# whether to normalize the image
normalize = True
# wheter to mask the images with a cosine mask
mask = True
# regularization parameter
l = 0.01
# the type of kernel correlation to be used
kernel_correlation = gaussian_correlation

c1 = np.hanning(patch_shape[0])
c2 = np.hanning(patch_shape[1])
cosine_mask = c1[..., None].dot(c2[None, ...]) if mask else None

We will use 3 different kernel correlation measures, namely: gaussian, polynomial and linear. Some of them have its own parameters.


In [ ]:
if kernel_correlation == gaussian_correlation:
    # sigma: gaussian kernel std
    kwargs = {'sigma': 0.3}
elif kernel_correlation == polynomial_correlation:
    # a: polynomial exponent, b: polynomial constant 
    kwargs = {'a': 10, 'b': 1}
elif kernel_correlation == linear_correlation: 
    # no params
    kwargs = {}

We are now ready to learn a KCF for the first image patch.


In [ ]:
x_ = normalizenorm_vec(x) if normalize else x
x_ = cosine_mask * x_ if mask else x_

alpha, x_ = learn_kcf(x_, y, kernel_correlation=kernel_correlation, l=l, **kwargs)

In [ ]:
fig_size = (6, 6)
plt.subplot(1, 2, 1)
plt.title('KCF in spatial domain')
plt.imshow(alpha[0])
plt.subplot(1, 2, 2)
plt.title('KCF in frequency domain')
plt.imshow(np.abs(fftshift(fft2(alpha[0]))))

Test KCF

In order to test the correctness of the learned KCF we will extract 61 x 61 patches centred around landmark number 42, i.e the right corner of the nose.. Note that we will now expect to get responses whith peaks shifted towards the right hence, correctly localizing the eye corner for which the KCF was learned.


In [ ]:
lm_test = 21

patch_shape2 =(91, 91)

image_prime_patches = []
pixel_prime_patches = []
for i in images:
    image_prime_patches.append(i.extract_patches_around_landmarks(patch_size=patch_shape2)[lm_test])
    feature_prime_patches = features(image_prime_patches[-1])
    pixel_prime_patches.append(feature_prime_patches.pixels)

In [ ]:
visualize_images(image_prime_patches)

Store patches as numpy array.


In [ ]:
X_prime = np.asarray(pixel_prime_patches)

In [ ]:
rs = [] 
for (z, x) in zip(X_prime, X):
    z_ = normalizenorm_vec(z) if normalize else z
    x_ = normalizenorm_vec(x) if normalize else x
    
    # compute kernel correlation between template and image
    kxz = kernel_correlation(x_, z_, **kwargs) 
    # compute kernel correlation response
    r = fast2dconv(kxz, alpha)
    
    rs.append(r)

In [ ]:
# only up to the first 5 images are shown
n_images = np.minimum(5, len(X_prime))
fig_size = (3*n_images, 3*n_images)

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, r in enumerate(rs[:n_images]):
    plt.subplot(1, n_images, j+1)
    plt.title('response')
    plt.imshow(r[0])

fig = plt.figure()
fig.set_size_inches(fig_size)
for j, (r, i) in enumerate(zip(rs[:n_images], image_prime_patches[:n_images])):
    plt.subplot(1, n_images, j+1)
    plt.title('original image')
    peak = np.asarray(np.unravel_index(r.argmax(), r.shape))[1:]
    i.landmarks['peak'] = PointCloud(peak[None, ...])
    i.view_landmarks(marker_face_colour='r', figure_size=fig_size)